Title Page COVID Image

Contents

Scrum Planning

Scrum Planning

Best Practices

Best Practices

EDA

Best Practices

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.offline as pyo
import plotly.io as pio
import plotly.graph_objs as go
from datetime import date
from urllib.request import urlopen
import json
from jupyter_dash import JupyterDash
from dash.dependencies import Input, Output
import dash_html_components as html
import dash_core_components as dcc
import warnings
warnings.filterwarnings('ignore')

"""
Function to web scrape most up to date data from usafacts website so that the code
is always up to date with the most recent COVID-19 information
"""
def scrape_live_data():
    
    # Load datasets dynamically from URLs
    confirm_df = pd.read_csv('https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_confirmed_usafacts.csv', header=0)
    # , encoding = "ISO-8859-1"
    pop_df = pd.read_csv('https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_county_population_usafacts.csv', header=0)
    
    # Combine into one dataset
    confirmed_cases = pd.merge(left=confirm_df, right=pop_df, how='left', left_on=['countyFIPS','State'], right_on=['countyFIPS','State'])

    # Rename/Drop/Move columns
    # get list of column names 
    all_columns = confirmed_cases.columns.values
    # rename county Name_x with county name
    all_columns[1] = 'County Name'
    # make updated column names list columns of dataframe
    confirmed_cases.columns = all_columns
    # drop additional county names column added on merge
    confirmed_cases.drop(labels=['County Name_y'], axis=1, inplace = True)
    # save the population column so we can add back into df later
    population_column = confirmed_cases['population']
    # drop population column from dataframe
    confirmed_cases.drop(labels=['population'], axis=1, inplace = True)
    # insert population column back in as 3rd column in df
    confirmed_cases.insert(3, 'population', population_column)
    
    indexes = confirmed_cases.loc[[1835, 1862, 1863]].index
    confirmed_cases.drop(indexes, inplace = True)


    return confirmed_cases


def scrape_live_data_deaths():
    
    # Load datasets dynamically from URLs
    death_df = pd.read_csv('https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_deaths_usafacts.csv', header=0)
    # , encoding = "ISO-8859-1"
    pop_df = pd.read_csv('https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_county_population_usafacts.csv', header=0)
    
    # Combine into one dataset
    confirmed_deaths = pd.merge(left=death_df, right=pop_df, how='left', left_on=['countyFIPS','State'], right_on=['countyFIPS','State'])


    # Rename/Drop/Move columns
    # get list of column names 
    all_columns = confirmed_deaths.columns.values
    # rename county Name_x with county name
    all_columns[1] = 'County Name'
    # make updated column names list columns of dataframe
    confirmed_deaths.columns = all_columns
    # drop additional county names column added on merge
    confirmed_deaths.drop(labels=['County Name_y'], axis=1, inplace = True)
    # save the population column so we can add back into df later
    population_column = confirmed_deaths['population']
    # drop population column from dataframe
    confirmed_deaths.drop(labels=['population'], axis=1, inplace = True)
    # insert population column back in as 3rd column in df
    confirmed_deaths.insert(3, 'population', population_column)
    
    indexes = confirmed_deaths.loc[[1835, 1862, 1863]].index
    confirmed_deaths.drop(indexes, inplace = True)


    return confirmed_deaths


def exploratoryDA(confirmed_cases):
    #checking first few rows to get a sense of data
    print("##############print first 10 rows:#################")
    print(confirmed_cases.head(10))
    #checking last ten rows
    print("##############print last 10 rows:(##############")
    print(confirmed_cases.tail(10))
    #checking the data types of the columns
    print("##############print column data types:##############")
    print(confirmed_cases.dtypes)
    #Gettign a sense of count
    print("#############print column counts:##############")
    print(confirmed_cases.count())
    #checking for nulls
    print("##############Check nulls:##############")
    print(confirmed_cases.isnull().sum()) (edited) 
    
#arrange columns
def createStatePOP(confirmed_cases):     
    #create a pivot table to group data by state to calcualte state population
    df_statepop = confirmed_cases.pivot_table(['population'],['State'],aggfunc='sum').reset_index()   
    return df_statepop


def meltData(confirmed_cases):
     #Unpivot date columns with variable column ‘Date’ and value column ‘Confirmed’
    #saving all dates in a list to use for melting the data frame
    dates = confirmed_cases.columns[6:] 
    df_confirm_long = confirmed_cases.melt(id_vars=['State'], value_vars=dates, var_name='Date', value_name='Confirmed') 
    df_confirm_long.to_csv('df_confirm_long.csv',index=True)      
    #make sure the confirmed cases are numeric and not string
    df_confirm_long['Confirmed'] = pd.to_numeric(df_confirm_long['Confirmed'],errors='coerce')
    #aggregate by state and get max confirmed cases to date
    last_date= dates[-1]
    df_confirm_long = df_confirm_long[df_confirm_long['Date']==last_date].groupby(['State'])['Confirmed'].sum().reset_index()    
   
    return df_confirm_long



#need to run after merge, arrange, melt
def createRate(df_statepop,df2_melt):  
    #merge both population and confirmed cases to calculate rate
    df2_melt = pd.merge(left=df2_melt, right=df_statepop, how='left', left_on=['State'], right_on=['State'])
    df2_melt['Rate'] = (df2_melt['Confirmed']/df2_melt['population'] ) *100
    return df2_melt



#need to run after merge, arrange, melt, create rate    
#Plot geo map for statewie rate = cases/popualtion
def createUSAMap(createRate):   
    map_data= dict(type='choropleth',
                   colorscale='Portland',
                   locations = createRate['State'],
                   locationmode ='USA-states',
                   z = createRate['Rate'],
                   text = createRate['State'],
                   colorbar = {'title': 'Cases/Population%'},
                   marker = dict(line = dict(color = 'rgb(255,255,255)', width=2))
                   )
    map_layout= dict(title = 'Percentage of COVID-19 Confirmed Cases to Date by Popualtion of the State',
                     geo= dict(scope='usa',
                     showlakes=True, # lakes
                     lakecolor='rgb(255, 255, 255)'))
    map_cases = go.Figure(map_data)
    map_cases.update_layout(map_layout)
    map_cases.show()          
    map_cases.write_html("state_rate.html")



#Storing data by Month to use for the heat map
#need to run merge before this
def structureDataTimeSeries(confirmed_cases):
    #drop the columns we don't need and store in a new dataframe
    df_confirm = confirmed_cases.drop(columns=['population'])
    #Unpivot date columns with variable column ‘Date’ and value column ‘Confirmed’
    dates = df_confirm.columns[5:]    
    #use melt function to make data long
    df_confirm_long = df_confirm.melt(id_vars='State', value_vars=dates, var_name='Date', value_name='Confirmed')                                               
    #aggregating by date and state and resetting
    df_confirm_long = df_confirm_long.groupby(['Date', 'State'])['Confirmed'].sum().reset_index()    
    #converting date strings to datetime
    df_confirm_long['Date'] = pd.to_datetime(df_confirm_long['Date'])    
    return df_confirm_long

#need to run merge, arrange, structure before this
def createHeatMap(df_confirm_long):
    ##Getting months with dates
    df_confirm_long['month'] = df_confirm_long['Date'].dt.month_name()    
    #We can also get data by day for further analysis if required.
    #df_confirm['day'] = df_confirm['date'].dt.day_name()        
    #aggregating by month and resetting and storing in a new dataframe
    df_month = df_confirm_long.groupby(['State', 'month'],sort=False)['Confirmed'].max().reset_index()   
    #saving the dataframe for back-up
    df_month.to_csv('df_cases_months.csv',index=True) 
    #creating data for heatmap to show confirmed cases for each state by month
    heatmap_data = pd.pivot_table(df_month, values='Confirmed', index='State', columns='month')    
    #verifying data
    # print(heatmap_data.iloc[0:3, 0:3]) 
    #store months in a list to use for sorting
    months=df_month.month.unique().tolist()        
    #sort columns by months
    heatmap_data = heatmap_data.reindex(columns=months)    
    #customize the heatmap
    plt.figure(figsize=(200,200))
    m = sns.heatmap(heatmap_data,cmap='RdBu_r', robust=True)
    m.set_xticklabels(heatmap_data.columns, rotation=45)
    m.set_yticklabels(heatmap_data.index, rotation=45)    
    m.set_title('Confirmed Cases by month for all the States in USA')
    #save the heatmap
    plt.savefig('heatmap.png', dpi=150)   
    plt.show()
    plt.savefig("heatmapByState.png")
    return plt

def createLinePlot(df_confirm_long):
    # Initialize figure and ax
    fig, ax = plt.subplots()
    states=['TX','CA','FL','AZ','NY']
    compare = df_confirm_long[df_confirm_long['State'].isin(states) ]
    #set parameter markers to True and style='continent'
    ax.set(yscale="log")
    ax.set_title("COVID-19 Cases Trends for AZ, CA, FL, NY, TX")
    sns.lineplot(x='Date', y='Confirmed', data=compare, hue='State', ax=ax, style = 'State', markers = True)
    plt.show()
    
def plot_geomap_counties(cases, deaths):
    
    # get the max cases for each county.  Want to get the max because cases  are  aggregate, not single day,
    # so cases on latest  date  is total number of cases thusfar
    
    last_date= cases.columns.tolist()[-1]
    
    max_cases = cases[['countyFIPS','County Name','State',f'{last_date}']]
    
    deaths = deaths[['countyFIPS',f'{last_date}']]

    deaths_dict = dict(zip(deaths['countyFIPS'], deaths[f'{last_date}']))
    
    max_cases['Deaths'] = max_cases['countyFIPS'].map(deaths_dict)
    
    #  code to plot a heatmap of  the  number of cases using plotly 
    with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
        counties = json.load(response)
    
    # need to add leading zero to all fips that have 4 characters, because that is how  it is stored in  df pulled with county fips
    #  create empty  dictionary for new fips values
    fips_new = {}
    
    #make fips str so we can add leading zeros and make into list we can loop through
    fips = cases['countyFIPS'].astype(str).tolist()
    
    # loop through  fips list and if length is 4 add a leading  zero.  Add  all updated fips to dict, with key as old fip and 
    # value as new fip
    for f in fips:
        if len(f) == 4:
            f_new = '0' + f
            fips_new[f] = f_new
        else:
            fips_new[f] = f
            
    #  make fips str
    max_cases['countyFIPS'] = max_cases['countyFIPS'].astype(str)
    # map dictionary as new column so that we can use the new fips that we created 
    max_cases['fips'] = max_cases['countyFIPS'].map(fips_new)
    # get all cases where county fip is not zero. Dont want statewide data in this instance
    max_cases = max_cases.loc[max_cases['countyFIPS'] != 0]
    
    
    # plot the heatmap of cases
    fig = px.choropleth_mapbox(max_cases, geojson=counties, locations='fips', color=f'{last_date}',
                                # color_continuous_scale="tempo",
                               range_color=(1, 5000),
                               mapbox_style="carto-positron",
                               zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
                               opacity=0.5,
                               labels={f'{last_date}':'# Cases', 'County Name':'County: ', 'Deaths': '# Deaths'},
                               hover_data=['County Name', 'Deaths']
                              )
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()
    


    
def plot_bar(cases, deaths):
    
    # get the max cases for each county.  Want to get the max because cases  are  aggregate, not single day,
    # so cases on latest  date  is total number of cases thusfar
    
    last_date= cases.columns.tolist()[-1]
    
    max_cases = cases[['countyFIPS','County Name','State',f'{last_date}']]
    
    deaths = deaths[['countyFIPS',f'{last_date}']]

    deaths_dict = dict(zip(deaths['countyFIPS'], deaths[f'{last_date}']))
    
    max_cases['Deaths'] = max_cases['countyFIPS'].map(deaths_dict)

    state_counts = max_cases.groupby('State').sum().reset_index()
    
    # rename the columns of each state
    state_counts.columns = ['State', 'CountyFIPS','NumCases', 'NumDeaths']
    # sort the state_counts by number of cases
    state_counts.sort_values(by='NumCases')
    
    # plot the number of cases in bar chart
    #fig = px.bar(state_counts, x='State', y='NumCases')
    #fig.update_layout(title=f'Number of COVID-19 Cases by State as of {last_date}')
    #fig.show()
    
    
    fig = go.Figure(data=[
    go.Bar(name='# Cases', x=state_counts['State'], y=state_counts['NumCases'])
    ])
    # Change the bar mode
    fig.update_layout(barmode='group', title='Number of COVID-19 Cases by State')
    fig.show()
    
    
    fig2 = go.Figure(data=[
    go.Bar(name='# Cases', x=state_counts['State'], y=state_counts['NumDeaths'])
    ])
    # Change the bar mode
    fig2.update_layout(barmode='group', title='Number of COVID-19 Deaths by State')
    fig2.show()

"""
Function to get the differences in cases for the entire dataset
"""
def get_diff_by_state(cases, state_name):
        
    # add try except block here. Also room for testing to make sure that correct state chosen
    state = cases.loc[cases['State'] == str(state_name)]

    # transpose dataframe so dates are rows    
    stateT = state.T
    
    # make county names the columns for transposed data frame 
    stateT.columns=stateT.iloc[1]
    
    # drop statewide counts
    stateT = stateT.drop(columns='Statewide Unallocated')
    
    # drop the first five rows because that is data we are not interested in in this instance
    stateT = stateT.iloc[5:]
    
    # create empty dictionary so we can store the difference in cases for each county in state
    diff_counts = {}

    for col in stateT:
        # get the column and find the difference for each row
        col_cases = stateT[col].diff()
        
        # append new series to dictionary as value with key as county name
        diff_counts[col] = col_cases

    # create dataframe with differences from each state by  day
    diff_counties = pd.DataFrame.from_dict(diff_counts)
    
    return diff_counties


"""
Function to get the differences in cases for the last week
"""
def get_diff_last_week(cases, state_name):
        
    # add try except block here. Also room for testing to make sure that correct state chosen
    state = cases.loc[cases['State'] == state_name]

    # transpose dataframe so dates are rows    
    stateT = state.T
    
    # make county names the columns for transposed data frame 
    stateT.columns=stateT.iloc[1]
    
    # drop statewide counts
    stateT = stateT.drop(columns='Statewide Unallocated')
    
    # drop the first three rows because that is data we are not interested in in this instance
    stateT = stateT.iloc[5:]
    
    # create empty dictionary so we can store the difference in cases for each county in state
    diff_counts = {}

    for col in stateT:
        # get the column and find the difference for each row
        col_cases = stateT[col].diff()
        
        # append new series to dictionary as value with key as county name
        diff_counts[col] = col_cases

    # create dataframe with differences from each state by  day
    diff_counties = pd.DataFrame.from_dict(diff_counts)
    
    # get the date for today so that we can update our data for last week
    today = pd.to_datetime(date.today())
    
    # get the date for a week prior so that we can filter our datetime column
    week = today - pd.Timedelta(days=14)
    
    # reset index so that we can get the Date as a column
    diff_counties = diff_counties.reset_index().rename(columns={'index':'Date'})
    
    # convert the date to pandas datetime format
    diff_counties['Date'] = pd.to_datetime(diff_counties['Date'], infer_datetime_format=True)
    
    # get the data where the date is greater than the last week
    diff_counties = diff_counties.loc[diff_counties['Date'] >= week]
    
    diff_counties.index = pd.to_datetime(diff_counties['Date'], format = '%m/%d/%Y')
    diff_counties.index = diff_counties.index.strftime('%Y-%m-%d')
    
    diff_counties = diff_counties.drop(columns=['Date'])
    
    return diff_counties 
In [4]:
confirmed_cases = scrape_live_data()
confirmed_deaths = scrape_live_data_deaths()

Wide Date

In [6]:
#arrange columns to produce geo map
statePOP = createStatePOP(confirmed_cases)
#melt
meltData_df = meltData(confirmed_cases)
#create cases/popualtion to create map
createRate = createRate(statePOP,meltData_df)

#structure for heat and line plot
df_confirm_long = structureDataTimeSeries(confirmed_cases)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-6-fa903d020b3d> in <module>
      4 meltData_df = meltData(confirmed_cases)
      5 #create cases/popualtion to create map
----> 6 createRate = createRate(statePOP,meltData_df)
      7 
      8 #structure for heat and line plot

TypeError: 'DataFrame' object is not callable

Transformations

Confirmed

Line

HeatMap

In [7]:
#geomap
createUSAMap(createRate)

plot_bar(confirmed_cases, confirmed_deaths)

#Line plot
createLinePlot(df_confirm_long)
In [13]:
#HEATMAP
createHeatMap(df_confirm_long)
Out[13]:
<module 'matplotlib.pyplot' from 'C:\\Users\\smn7ba\\Anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py'>
<Figure size 432x288 with 0 Axes>

HeatMap

Geo Map

In [8]:
plot_geomap_counties(confirmed_cases, confirmed_deaths)

New Cases 1

New Cases 2

In [9]:
app = JupyterDash(__name__)
app.layout = html.Div([
    html.H3("New COVID Cases Per Day Demo"),
    dcc.Graph(id='graph'),
    html.Label([
        "State",
        dcc.Dropdown(
            id='state-name', clearable=False,
            value='AL', options=[
                {'label': c, 'value': c}
                for c in confirmed_cases['State'].unique().tolist()
            ])
    ]),
])
# Define callback to update graph
@app.callback(
    Output('graph', 'figure'),
    [Input("state-name", "value")]
)
def update_figure(state_name):
    
    diff_counties = get_diff_by_state(confirmed_cases, state_name)
    fig2 = go.Figure(data=go.Heatmap(
        z=[diff_counties[i] for i in diff_counties.columns],
        x=diff_counties.index.tolist(),
        y=diff_counties.columns.tolist(),
        colorscale='Viridis'))
    
    return fig2

# Run app and display result inline in the notebook
app.run_server(mode='inline', port=8051)
In [8]:
app = JupyterDash(__name__)
app.layout = html.Div([
    html.H1("New COVID Cases Per Day In the Last Two Week Demo"),
    dcc.Graph(id='graph'),
    html.Label([
        "State",
        dcc.Dropdown(
            id='state-name', clearable=False,
            value='AL', options=[
                {'label': c, 'value': c}
                for c in confirmed_cases['State'].unique().tolist()
            ])
    ]),
])
# Define callback to update graph
@app.callback(
    Output('graph', 'figure'),
    [Input("state-name", "value")]
)
def update_figure(state_name):
    
    diff_counties = get_diff_last_week(confirmed_cases, state_name) 
    fig2 = go.Figure(data=go.Heatmap(
        z=[diff_counties[i] for i in diff_counties.columns],
        x=diff_counties.index.tolist(),
        y=diff_counties.columns.tolist(),
        colorscale='Viridis'))
    
    return fig2
# Run app and display result inline in the notebook
app.run_server(mode='inline',port=8060)

Testing

In [10]:
import unittest
import datetime
from datetime import timedelta

# inherit from unittest.TestCase
class WebScraperTestCase(unittest.TestCase):

    def setUp(self):
        print("\n*** SETUP ***\n")
        # Get the latest data
        self.confirmed_cases = scrape_live_data()

    def tearDown(self):
        print("\n*** TEARDOWN ***\n")
        # Remove the scraped data
        del self.confirmed_cases
    
    # TEST - 1 #
    # Is scrape_live_data() method successfully getting oldest data for COVID cases
    def test_1_is_oldest_data_scraped(self):
        print("Verify web scraped data starts from January 22, 2020")
        # Last date must be for yesterday (1 day ago) - reformatted without leading 0
        first_day = datetime.datetime(2020, 1, 22).strftime("%x").lstrip("0").replace(" 0", " ")
        self.assertEqual(self.confirmed_cases.columns[5], first_day)

    # TEST - 2 #
    # Is scrape_live_data() method successfully getting latest data for COVID cases
    def test_2_is_latest_data_scraped(self):
        print("Verify web scraped data is current (till yesterday)")
        # Last date must be for yesterday (1 day ago) - reformatted without leading 0
        yesterday = (date.today() - timedelta(days = 1)).strftime("%x").lstrip("0").replace(" 0", " ")
        self.assertEqual(self.confirmed_cases.columns[-1], yesterday)

    # TEST - 3 #
    # Is scrape_live_data() method successfully getting required columns for data analysis
    def test_3_is_required_data_scraped(self):
        print("Verify web scraped data contains required columns")
        required_columns = ["countyFIPS", "County Name", "State", "population", "stateFIPS"]
        self.assertEqual(self.confirmed_cases.columns[0:5].tolist(), required_columns)
        
    # TEST - 4 #
    # Is createStatPOP() method returning data in required format
    def test_4_is_population_data_formatted(self):
        print("Verify state-wise population data is in required format")
        state_population = createStatPOP(self.confirmed_cases)
        headers = ["State", "population"]
        self.assertEqual(state_population.columns.tolist(), headers)

    # TEST - 5 #
    # Is createStatPOP() method returning data for all 50 states
    def test_5_is_population_data_for_all_states(self):
        print("Verify state-wise population data exists for all 50 states")
        state_population = createStatPOP(self.confirmed_cases)
        self.assertEqual(len(state_population.index), 51)
        
    # TEST - 6 #
    # Is meltData() method returning data in required format
    def test_6_is_pivot_data_formatted(self):
        print("Verify date-wise data for each state is in required format")
        melted_data = meltData(self.confirmed_cases)
        headers = ["State", "Confirmed"]
        self.assertEqual(melted_data.columns.tolist(), headers)
        
    # TEST - 7 #
    # Is structureDataTimeSeries() method returning data in required format
    def test_7_is_time_series_data_formatted(self):
        print("Verify time-series data is in required format")
        time_series_data = structureDataTimeSeries(self.confirmed_cases)
        headers = ["State", "month", "Confirmed"]
        self.assertEqual(time_series_data.columns.tolist(), headers)

if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)
*** SETUP ***

.
Verify web scraped data starts from January 22, 2020

*** TEARDOWN ***


*** SETUP ***

F
Verify web scraped data is current (till yesterday)

*** TEARDOWN ***


*** SETUP ***

.
Verify web scraped data contains required columns

*** TEARDOWN ***


*** SETUP ***

E
Verify state-wise population data is in required format

*** TEARDOWN ***


*** SETUP ***

E
Verify state-wise population data exists for all 50 states

*** TEARDOWN ***


*** SETUP ***

Verify date-wise data for each state is in required format
.
*** TEARDOWN ***


*** SETUP ***

F
Verify time-series data is in required format

*** TEARDOWN ***

======================================================================
ERROR: test_4_is_population_data_formatted (__main__.WebScraperTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-10-20df42ce81e4>", line 45, in test_4_is_population_data_formatted
    state_population = createStatPOP(self.confirmed_cases)
NameError: name 'createStatPOP' is not defined

======================================================================
ERROR: test_5_is_population_data_for_all_states (__main__.WebScraperTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-10-20df42ce81e4>", line 53, in test_5_is_population_data_for_all_states
    state_population = createStatPOP(self.confirmed_cases)
NameError: name 'createStatPOP' is not defined

======================================================================
FAIL: test_2_is_latest_data_scraped (__main__.WebScraperTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-10-20df42ce81e4>", line 32, in test_2_is_latest_data_scraped
    self.assertEqual(self.confirmed_cases.columns[-1], yesterday)
AssertionError: '8/4/20' != '8/04/20'
- 8/4/20
+ 8/04/20
?   +


======================================================================
FAIL: test_7_is_time_series_data_formatted (__main__.WebScraperTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-10-20df42ce81e4>", line 70, in test_7_is_time_series_data_formatted
    self.assertEqual(time_series_data.columns.tolist(), headers)
AssertionError: Lists differ: ['Date', 'State', 'Confirmed'] != ['State', 'month', 'Confirmed']

First differing element 0:
'Date'
'State'

- ['Date', 'State', 'Confirmed']
+ ['State', 'month', 'Confirmed']

----------------------------------------------------------------------
Ran 7 tests in 15.276s

FAILED (failures=2, errors=2)

Beyond Specs

Conclusion

In [ ]: